# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import plotly.express as px
import plotly.graph_objs as go
from plotly import tools
from plotly.offline import init_notebook_mode, plot, iplot
# Extract latitudes & longitudes of locations
import geopy
from geopy.geocoders import Nominatim
geolocator=Nominatim(user_agent="app")
location = geolocator.geocode("Slovakia")
print(location.latitude)
print(location.longitude)
48.7411522 19.4528646
# Import dataset
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
current_data = pd.read_csv('https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv')
current_data.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 0 | 2020-01-22 | Afghanistan | 0 | 0 | 0 |
| 1 | 2020-01-23 | Afghanistan | 0 | 0 | 0 |
| 2 | 2020-01-24 | Afghanistan | 0 | 0 | 0 |
| 3 | 2020-01-25 | Afghanistan | 0 | 0 | 0 |
| 4 | 2020-01-26 | Afghanistan | 0 | 0 | 0 |
# Choropleth map of confirmed cases in Europe
fig = px.choropleth(current_data,locations='Country',locationmode='country names',color='Confirmed',animation_frame='Date',scope='europe')
fig.update_layout(title='Choropleth map of confirmed cases in Europe',template="plotly_dark")
fig.show()
# Choropleth map of spread over time
fig = px.scatter_geo(current_data,locations='Country',locationmode='country names',color='Confirmed',size='Confirmed',hover_name="Country",animation_frame='Date',title='Spread over Time')
fig.update(layout_coloraxis_showscale=False,layout_template="plotly_dark")
fig.show()
# Create dataframe
df = current_data
df.shape
(55670, 5)
# Group by country
df2 = df.groupby(['Country'])[['Confirmed','Recovered','Deaths']].max().reset_index()
df2.head()
| Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|
| 0 | Afghanistan | 42297 | 34721 | 1574 |
| 1 | Albania | 24731 | 12203 | 571 |
| 2 | Algeria | 62693 | 42325 | 2062 |
| 3 | Andorra | 5437 | 4332 | 75 |
| 4 | Angola | 12680 | 5927 | 308 |
# Zoomed in on my homeland
df2[df2['Country']=='Slovakia']
| Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|
| 154 | Slovakia | 76072 | 19757 | 366 |
# Daily updates
df_slovakia = df[df['Country']=='Slovakia']
df_slovakia.tail()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 45410 | 2020-11-05 | Slovakia | 68734 | 16426 | 286 |
| 45411 | 2020-11-06 | Slovakia | 71088 | 16990 | 317 |
| 45412 | 2020-11-07 | Slovakia | 73667 | 18354 | 351 |
| 45413 | 2020-11-08 | Slovakia | 75495 | 19481 | 351 |
| 45414 | 2020-11-09 | Slovakia | 76072 | 19757 | 366 |
# COVID-19 confirmed, recovered cases with deaths in Slovakia
def show_plot(df, fig_title):
df.plot(x = 'Date', figsize = (15,7), linewidth = 3, title = fig_title)
plt.grid()
plt.show()
show_plot(df_slovakia, "COVID-19 in Slovakia")
# COVID-19 deaths in Slovakia
df_slovakia.plot(x = 'Date', y = 'Deaths', figsize = (15,7), linewidth = 2);
plt.ylabel('Count')
plt.title('COVID-19 deaths in Slovakia')
plt.legend(loc="upper left")
plt.grid()
# Daily updates
df_us = df[df['Country']=='US']
df_us.tail()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 51856 | 2020-11-05 | US | 9643540 | 3781751 | 235014 |
| 51857 | 2020-11-06 | US | 9752360 | 3810791 | 236168 |
| 51858 | 2020-11-07 | US | 9880828 | 3851465 | 237204 |
| 51859 | 2020-11-08 | US | 9999644 | 3881491 | 237666 |
| 51860 | 2020-11-09 | US | 10111077 | 3928845 | 238256 |
# COVID-19 confirmed, recovered cases with deaths in the US
def show_plot(df, fig_title):
df.plot(x = 'Date', figsize = (15,7), linewidth = 3, title = fig_title)
plt.grid()
plt.show()
show_plot(df_us, "COVID-19 in US")
# COVID-19 deaths in the US
df_slovakia.plot(x = 'Date', y = 'Deaths', figsize = (15,7), linewidth = 2);
plt.ylabel('Count')
plt.title('COVID-19 deaths in US')
plt.legend(loc="upper left")
plt.grid()
# Identify latitudes & longitudes of locations, append them to new dataframe and drop geo_loc
lat_lon=[]
geolocator=Nominatim(user_agent="app")
for location in df2['Country']:
location = geolocator.geocode(location)
if location is None:
lat_lon.append(np.nan)
else:
geo=(location.latitude,location.longitude)
lat_lon.append(geo)
df2['geo_loc'] = lat_lon
lat,lon = zip(*np.array(df2['geo_loc']))
df2['Latitude']=lat
df2['Longitude']=lon
df2.drop(['geo_loc'],axis=1,inplace=True)
df2.head()
| Country | Confirmed | Recovered | Deaths | Latitude | Longitude | |
|---|---|---|---|---|---|---|
| 0 | Afghanistan | 42297 | 34721 | 1574 | 33.768006 | 66.238514 |
| 1 | Albania | 24731 | 12203 | 571 | 41.000028 | 19.999962 |
| 2 | Algeria | 62693 | 42325 | 2062 | 28.000027 | 2.999983 |
| 3 | Andorra | 5437 | 4332 | 75 | 42.540717 | 1.573203 |
| 4 | Angola | 12680 | 5927 | 308 | -11.877577 | 17.569124 |
# Plotting a map of current confirmed cases
import folium
current_map = folium.Map(location=[54, 15], tiles='openstreetmap', zoom_start=2)
for idx, row in df2.iterrows():
folium.Marker([row['Latitude'], row['Longitude']], popup=row['Recovered']).add_to(current_map)
current_map
# Plotting a map of current confirmed cases in clusters
current_map = folium.Map(location=[54,15], tiles='cartodbpositron', zoom_start=2)
from folium.plugins import MarkerCluster
mc = MarkerCluster()
for idx, row in df2.iterrows():
mc.add_child(folium.Marker([row['Latitude'], row['Longitude']],popup=row['Confirmed']))
current_map.add_child(mc)
current_map
# Plotting a map of current confirmed cases with heat map
from folium.plugins import HeatMap
current_map = folium.Map(location=[54,15], zoom_start=2)
HeatMap(data=df2[['Latitude', 'Longitude','Confirmed']], radius=15).add_to(current_map)
current_map
# Import dataset
worldometer = pd.read_csv('~/Documents/Github/various-projects/data/worldometer_data.csv')
worldometer.head()
| Country/Region | Continent | Population | TotalCases | NewCases | TotalDeaths | NewDeaths | TotalRecovered | NewRecovered | ActiveCases | Serious,Critical | Tot Cases/1M pop | Deaths/1M pop | TotalTests | Tests/1M pop | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | USA | North America | 3.317063e+08 | 10557037 | 133427.0 | 245777.0 | 1322.0 | 6595348.0 | 41959.0 | 3715912.0 | 19077.0 | 31826.0 | 741.0 | 160475931.0 | 483789.0 | Americas |
| 1 | India | Asia | 1.384901e+09 | 8635754 | 44679.0 | 127615.0 | 511.0 | 8011844.0 | 54638.0 | 496295.0 | 8944.0 | 6236.0 | 92.0 | 119615857.0 | 86371.0 | South-EastAsia |
| 2 | Brazil | South America | 2.131064e+08 | 5701283 | 25517.0 | 162842.0 | 204.0 | 5064344.0 | NaN | 474097.0 | 8318.0 | 26753.0 | 764.0 | 21900000.0 | 102766.0 | Americas |
| 3 | France | Europe | 6.532608e+07 | 1829659 | 22180.0 | 42207.0 | 857.0 | 131920.0 | 2185.0 | 1655532.0 | 4750.0 | 28008.0 | 646.0 | 18249032.0 | 279353.0 | Europe |
| 4 | Russia | Europe | 1.459573e+08 | 1817109 | 20977.0 | 31161.0 | 368.0 | 1350741.0 | 15600.0 | 435207.0 | 2300.0 | 12450.0 | 213.0 | 65600000.0 | 449447.0 | Europe |
# Highlighting maximum values
# worldometer.style.background_gradient(cmap='RdPu')
# Creating function of comparision
def plot(df,x,y,xaxis_label,yaxis_label,title):
fig = px.bar(worldometer.head(20), y=y,x=x,color='WHO Region')
fig.update_layout(title=title,xaxis_title=xaxis_label,yaxis_title=yaxis_label)
fig.show()
# Total Number of Cases
plot(worldometer,'Country/Region','TotalCases','Country','Deaths/Million','Total Number of Cases')
# Top 10 countries of "Deaths/1M pop" & "Tests/1M pop"
top10d_1m = worldometer.nlargest(n = 15, columns = "Deaths/1M pop")
top10t_1m = worldometer.nlargest(n = 15, columns = "Tests/1M pop")
# Show top 15 countries of "Deaths/1M pop"
plt.figure(figsize=(20, 7))
sns.barplot(x = 'Country/Region', y = "Deaths/1M pop", data = top10d_1m);
# Show top 15 countries of "Tests/1M pop"
plt.figure(figsize=(20, 7))
sns.barplot(x = 'Country/Region', y = "Tests/1M pop", data = top10t_1m);
# Slovakia
slovakia = worldometer[worldometer['Country/Region']=='Slovakia']
slovakia
| Country/Region | Continent | Population | TotalCases | NewCases | TotalDeaths | NewDeaths | TotalRecovered | NewRecovered | ActiveCases | Serious,Critical | Tot Cases/1M pop | Deaths/1M pop | TotalTests | Tests/1M pop | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 61 | Slovakia | Europe | 5460606.0 | 77123 | 1051.0 | 390.0 | 24.0 | 21718.0 | 1961.0 | 55015.0 | 119.0 | 14124.0 | 71.0 | 913096.0 | 167215.0 | Europe |
# Show Slovakia
sns.set_style('darkgrid')
f, axes = plt.subplots(2, 2, figsize = (12,12))
a_1 = sns.barplot (data=slovakia, x = 'Country/Region', y = 'Population', ax =axes[0,0], color = "blue")
a_2 = sns.barplot (data=slovakia, x = 'Country/Region', y = 'TotalCases', ax =axes[0,1], color = "green")
a_3 = sns.barplot (data=slovakia, x = 'Country/Region', y = 'NewCases', ax =axes[1,0], color = "orange")
b_1 = sns.barplot (data=slovakia, x = 'Country/Region', y = 'TotalDeaths', ax =axes[1,1], color = "red")
plt.show()
# USA
usa = worldometer[worldometer['Country/Region']=='USA']
usa
| Country/Region | Continent | Population | TotalCases | NewCases | TotalDeaths | NewDeaths | TotalRecovered | NewRecovered | ActiveCases | Serious,Critical | Tot Cases/1M pop | Deaths/1M pop | TotalTests | Tests/1M pop | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | USA | North America | 331706290.0 | 10557037 | 133427.0 | 245777.0 | 1322.0 | 6595348.0 | 41959.0 | 3715912.0 | 19077.0 | 31826.0 | 741.0 | 160475931.0 | 483789.0 | Americas |